__author__ = 'Sebastian Enger, M.Sc.' import sys import pprint import nltk from nltk.tokenize import sent_tokenize #nltk.download() reload(sys) sys.setdefaultencoding('utf8') pp = pprint.PrettyPrinter(indent=4) #delimiters = ['\n', ' ', ',', '.', '?', '!', ':', ';', '\s', '\t', '\r'] filename = sys.argv[-1] # read file into string text = open(filename, 'r').read() #text.decode('utf-8') sent_tokenize_list = sent_tokenize(text) for ele in sent_tokenize_list: word_list = ele.split() for word in word_list: if word.istitle(): print "Word: ",word, " -> gross geschrieben\n" #pp.pprint(word_list)